/*
Purpose: Clean and normalize reading data from NAEP data explorer
*/

clear all

import excel "data/raw/state_scale_score_read.xls", cellrange("A9:E698") first clear
ren Year year
ren Jurisdiction state_name
drop Allstudents
ren Averagescalescore scale_score
ren Standarddeviation sd_scale
tempfile score_sd
save `score_sd', replace

drop if year=="1998¹" //1998 has two sets of scores; dropping 1998¹ because the "1" means no accommodations were permitted (i.e., it is less similar to the current version of the test)
destring year, replace

drop if year==2022 //unused in our analysis

preserve
keep if state_name=="National public"&year==1998
global mean98 = real(scale_score[1])
global sd98 = real(sd_scale[1])
restore

drop if scale_score=="—"|scale_score=="‡"
destring scale_score, replace
gen scale_score_norm = (scale_score - $mean98) / $sd98
drop scale_score

drop sd_scale

statastates, name(state_name)
keep if _merge==3|state_name=="NATIONAL PUBLIC"
drop _merge

replace state_fips = 0 if state_name=="NATIONAL PUBLIC"

save "data/clean/read8_normed", replace
